required_packages <- c("readxl", "reshape2", "dplyr","ggplot2")
# Install and load packages if not already installed
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}

#import data frame
if (!exists("df")) {
  file_path <- "~/cohort.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}

#filter df 
df_filtered <- df[, c("r_upper_suv_max","r_upper_suv_mean","r_upper_suv_sd", "r_lower_suv_max","r_lower_suv_mean","r_lower_suv_sd", 
                      "l_upper_suv_max","l_upper_suv_mean","l_upper_suv_sd", "l_lower_suv_max","l_lower_suv_mean","l_lower_suv_sd", 
                      "side_r0_l1","pet_distance_ic")]
df_filtered[] <- lapply(df_filtered, as.numeric)
df_filtered <- subset(df_filtered, pet_distance_ic <= 365 | is.na(pet_distance_ic))
df_filtered <- subset(df_filtered, pet_distance_ic >= -1)

#add cols
cols <- c("no_tumor_suv_mean", "suv_95","suv_lung_mean","suv_lung_max","suv_95_both","suv_95_upper","suv_95_lower")
df_filtered[,cols] <- NA

#calculate SUV
for (i in 1:nrow(df_filtered)) {
  if (is.na(df_filtered$l_upper_suv_mean[i]) & !is.na(df_filtered$r_upper_suv_mean[i])) {
    df_filtered$l_upper_suv_mean[i] <- df_filtered$r_upper_suv_mean[i]
    df_filtered$l_upper_suv_sd[i] <- df_filtered$r_upper_suv_sd[i]
  }
  if (is.na(df_filtered$r_upper_suv_mean[i]) & !is.na(df_filtered$l_upper_suv_mean[i])) {
    df_filtered$r_upper_suv_mean[i] <- df_filtered$l_upper_suv_mean[i]
    df_filtered$r_upper_suv_sd[i] <- df_filtered$l_upper_suv_sd[i]
  }
  if (is.na(df_filtered$l_lower_suv_mean[i]) & !is.na(df_filtered$r_lower_suv_mean[i])) {
    df_filtered$l_lower_suv_mean[i] <- df_filtered$r_lower_suv_mean[i]
    df_filtered$l_lower_suv_sd[i] <- df_filtered$r_lower_suv_sd[i]
  }
  if (is.na(df_filtered$r_lower_suv_mean[i]) & !is.na(df_filtered$l_lower_suv_mean[i])) {
    df_filtered$r_lower_suv_mean[i] <- df_filtered$l_lower_suv_mean[i]
    df_filtered$r_lower_suv_sd[i] <- df_filtered$l_lower_suv_sd[i]
  }

  if (!is.na(df_filtered$side_r0_l1[i])) {
    if (df_filtered$side_r0_l1[i] == 1 & !is.na(df_filtered$l_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$l_lower_suv_mean[i], df_filtered$l_upper_suv_mean[i], na.rm = TRUE)
    } else if (df_filtered$side_r0_l1[i] == 0 & !is.na(df_filtered$r_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$r_lower_suv_mean[i], df_filtered$r_upper_suv_mean[i], na.rm = TRUE)
    }
  }
}

df_filtered$upper_suv_mean <- rowMeans(df_filtered[, c("r_upper_suv_mean", "l_upper_suv_mean")], na.rm = TRUE)
df_filtered$upper_suv_max <- rowMeans(df_filtered[, c("r_upper_suv_max", "l_upper_suv_max")], na.rm = TRUE)
df_filtered$upper_suv_sd <- rowMeans(df_filtered[, c("r_upper_suv_sd", "l_upper_suv_sd")], na.rm = TRUE)
df_filtered$lower_suv_mean <- rowMeans(df_filtered[, c("r_lower_suv_mean", "l_lower_suv_mean")], na.rm = TRUE)
df_filtered$lower_suv_max <- rowMeans(df_filtered[, c("r_lower_suv_max", "l_lower_suv_max")], na.rm = TRUE)
df_filtered$lower_suv_sd <- rowMeans(df_filtered[, c("r_lower_suv_sd", "l_lower_suv_sd")], na.rm = TRUE)
df_filtered$suv_lung_mean <- rowMeans(df_filtered[, c("upper_suv_mean", "lower_suv_mean")], na.rm = TRUE)
df_filtered$suv_lung_max <- rowMeans(df_filtered[, c("upper_suv_max", "lower_suv_max")], na.rm = TRUE)
df_filtered$suv_lung_sd <- rowMeans(df_filtered[, c("upper_suv_sd", "lower_suv_sd")], na.rm = TRUE)

for (i in 1:nrow(df_filtered)) {
  df_filtered$suv_95_both[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.95) * df_filtered$suv_lung_sd[i])
  df_filtered$suv_95_upper[i] <- df_filtered$upper_suv_mean[i] + (qnorm(0.95) * df_filtered$upper_suv_sd[i])
  df_filtered$suv_95_lower[i] <- df_filtered$lower_suv_mean[i] + (qnorm(0.95) * df_filtered$lower_suv_sd[i])
}

#rename variables in df
df_filtered <- df_filtered %>%
  rename(SUV95 = suv_95, "Mean SUVmean in both lungs" = suv_lung_mean,  
         "Mean SUVmax in both lungs" = suv_lung_max,  
         "Mean SUV95 in both lungs" = suv_95_both,  
         "Mean SUVmean in upper lung" = upper_suv_mean,  
         "Mean SUVmax in upper lung" = upper_suv_max,  
         "Mean SUV95 in upper lung" = suv_95_upper,  
         "Mean SUVmean in lower lung" = lower_suv_mean,
         "Mean SUVmax in lower lung" = lower_suv_max,
         "Mean SUV95 in lower lung" = suv_95_lower)

comparison_df <- df_filtered %>%
  mutate(index = row_number()) %>%
  select(index, "Mean SUVmean in both lungs", "Mean SUVmax in both lungs", "Mean SUV95 in both lungs",
         "Mean SUVmean in upper lung", "Mean SUVmax in upper lung", "Mean SUV95 in upper lung",
         "Mean SUVmean in lower lung", "Mean SUVmax in lower lung", "Mean SUV95 in lower lung")
comparison_df_long <- melt(comparison_df, id.vars = "index", variable.name = "SUV_Mean_Type", value.name = "SUV_Mean_Value")

# Create a boxplot
boxplot <- ggplot(comparison_df_long, aes(x = SUV_Mean_Type, y = SUV_Mean_Value)) +
  geom_boxplot(color = "black", fill = "white", na.rm = TRUE) +
  labs(title = "",
       x = "",
       y = "Standardized Uptake Value") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 55, hjust = 1))

#Max and Min of SUV
summary_df <- comparison_df_long %>%
  group_by(SUV_Mean_Type) %>%
  summarize(Maximum = max(SUV_Mean_Value, na.rm = TRUE),
            Minimum = min(SUV_Mean_Value, na.rm = TRUE))

print (paste("Mean SUVmean of the lung varied between",round(summary_df$Minimum[1],2), "g/ml and",round(summary_df$Maximum[1],2),"g/ml."))
print (paste("Mean SUVmax of the lung varied between",round(summary_df$Minimum[2],2), "g/ml and",round(summary_df$Maximum[2],2),"g/ml,"))
print (paste("mean SUV95 varied between",round(summary_df$Minimum[3],2), "g/ml and",round(summary_df$Maximum[3],2),"g/ml."))
